In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

%matplotlib inline is necessary for Seaborn to display the plot without additional calls


In [2]:
train = pd.read_csv("train.csv")

In [3]:
test = pd.read_csv("test.csv")

We use Pandas to load the csv files.


In [4]:
train.head()


Out[4]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition SalePrice
0 1 60 RL 65.0 8450 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 2 2008 WD Normal 208500
1 2 20 RL 80.0 9600 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 5 2007 WD Normal 181500
2 3 60 RL 68.0 11250 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 9 2008 WD Normal 223500
3 4 70 RL 60.0 9550 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 2 2006 WD Abnorml 140000
4 5 60 RL 84.0 14260 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 12 2008 WD Normal 250000

5 rows × 81 columns


In [6]:
sns.barplot('MSSubClass', 'SalePrice', data=train)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0xb8f2908>

MSSubClass: Identifies the type of dwelling involved in the sale


In [7]:
sns.barplot('MSZoning', 'SalePrice', data=train)


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0xbdbd208>

In [8]:
sns.lmplot('LotFrontage', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers


Out[8]:
<seaborn.axisgrid.FacetGrid at 0xc0846a0>

In [9]:
sns.lmplot('LotArea', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers


Out[9]:
<seaborn.axisgrid.FacetGrid at 0xc31c518>

In [10]:
sns.barplot('Street', 'SalePrice', data=train)


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0xc4244e0>

In [11]:
sns.barplot('Alley', 'SalePrice', data=train)


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0xcfe0cc0>

In [12]:
sns.barplot('LotShape', 'SalePrice', data=train)
# Reg  Regular
# IR1  Slightly irregular
# IR2  Moderately Irregular
# IR3  Irregular


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0xd27d320>

In [13]:
sns.barplot('LandContour', 'SalePrice', data=train)


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0xd478a20>

In [14]:
sns.barplot('Utilities', 'SalePrice', data=train)


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0xd5def98>

In [15]:
sns.barplot('LotConfig', 'SalePrice', data=train)


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0xd267160>

In [16]:
sns.barplot('LandSlope', 'SalePrice', data=train)


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0xd5d3e10>

In [17]:
sns.barplot('Neighborhood', 'SalePrice', data=train)


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0xdc71080>

In [18]:
sns.barplot('Condition1', 'SalePrice', data=train)


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0xe697198>

In [19]:
sns.barplot('Condition2', 'SalePrice', data=train)


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0xe8af9b0>

In [20]:
sns.barplot('BldgType', 'SalePrice', data=train)


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0xeb3ff28>

In [21]:
sns.barplot('HouseStyle', 'SalePrice', data=train)


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0xec4b0b8>

In [105]:
sns.barplot('OverallQual', 'SalePrice', data=train)


Out[105]:
<matplotlib.axes._subplots.AxesSubplot at 0x24d25278>

In [104]:
sns.barplot('OverallCond', 'SalePrice', data=train)
# Notice how the wide range of values for 5 distorts the regression line


Out[104]:
<matplotlib.axes._subplots.AxesSubplot at 0x2444ce10>

In [27]:
sns.lmplot('YearBuilt', 'SalePrice', data=train, fit_reg=True)


Out[27]:
<seaborn.axisgrid.FacetGrid at 0x10e43e10>

In [29]:
sns.lmplot('YearRemodAdd', 'SalePrice', data=train, fit_reg=True)


Out[29]:
<seaborn.axisgrid.FacetGrid at 0x1150a278>

In [31]:
sns.barplot('RoofStyle', 'SalePrice', data=train)


Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x1214a080>

In [33]:
sns.barplot('RoofMatl', 'SalePrice', data=train)


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x122f6978>

In [34]:
sns.barplot('Exterior1st', 'SalePrice', data=train)


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x122f6240>

In [35]:
sns.barplot('Exterior2nd', 'SalePrice', data=train)


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x1259f4e0>

In [36]:
sns.barplot('MasVnrType', 'SalePrice', data=train)


Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x12b77668>

In [40]:
sns.lmplot('MasVnrArea', 'SalePrice', data=train, fit_reg=True)
# The high number of cases with an area = 0 is causing bias in the regression line


Out[40]:
<seaborn.axisgrid.FacetGrid at 0x169f2470>

In [41]:
sns.barplot('ExterQual', 'SalePrice', data=train)
# Seems that exterior quality is associated with higher sale price compared to...


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x17b306a0>

In [45]:
sns.barplot('ExterCond', 'SalePrice', data=train)
# Exterior condition


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a6e5048>

In [42]:
sns.barplot('Foundation', 'SalePrice', data=train)
# Poured concrete is more likely to be associated with higher sales prices


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x19f867b8>

In [43]:
sns.barplot('BsmtQual', 'SalePrice', data=train)
# Evaluates the height of the basement
# There is no "Po" (Poor, <70 inches)
# Seems that a high basement ceiling is associated with much higher Sales Price


Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1c6358>

In [44]:
sns.barplot('BsmtCond', 'SalePrice', data=train)
# Strange that the good quality basements would be associated with higher Sales Price
# This may suggest that Basement Condition is not as import


Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a3847f0>

In [46]:
sns.barplot('BsmtExposure', 'SalePrice', data=train)


Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a900c88>

In [47]:
sns.barplot('BsmtFinType1', 'SalePrice', data=train)


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x1aa9a908>

In [49]:
sns.lmplot('BsmtFinSF1', 'SalePrice', data=train, fit_reg=True)
# Regression line is again impacted by the high number of houses with area of 0


Out[49]:
<seaborn.axisgrid.FacetGrid at 0x1ac10f98>

In [50]:
sns.barplot('BsmtFinType2', 'SalePrice', data=train)


Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x18e24860>

In [52]:
sns.lmplot('BsmtFinSF2', 'SalePrice', data=train, fit_reg=True)


Out[52]:
<seaborn.axisgrid.FacetGrid at 0x18ac0cc0>

In [54]:
sns.lmplot('BsmtUnfSF', 'SalePrice', data=train, fit_reg=True)


Out[54]:
<seaborn.axisgrid.FacetGrid at 0x1b6dee80>

In [55]:
sns.lmplot('TotalBsmtSF', 'SalePrice', data=train, fit_reg=True)


Out[55]:
<seaborn.axisgrid.FacetGrid at 0x1b7d21d0>

In [56]:
sns.barplot('Heating', 'SalePrice', data=train)


Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x1b860a20>

In [57]:
sns.barplot('HeatingQC', 'SalePrice', data=train)


Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0x1be200f0>

In [58]:
sns.barplot('CentralAir', 'SalePrice', data=train)


Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bb8b550>

In [59]:
sns.barplot('Electrical', 'SalePrice', data=train)


Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bf980b8>

In [60]:
sns.lmplot('1stFlrSF', 'SalePrice', data=train, fit_reg=True)


Out[60]:
<seaborn.axisgrid.FacetGrid at 0x1d3a7128>

In [61]:
sns.lmplot('2ndFlrSF', 'SalePrice', data=train, fit_reg=True)


Out[61]:
<seaborn.axisgrid.FacetGrid at 0x1d4c2160>

In [62]:
sns.lmplot('LowQualFinSF', 'SalePrice', data=train, fit_reg=True)


Out[62]:
<seaborn.axisgrid.FacetGrid at 0x1d8ee710>

In [63]:
sns.lmplot('GrLivArea', 'SalePrice', data=train, fit_reg=True)


Out[63]:
<seaborn.axisgrid.FacetGrid at 0x1db08240>

In [66]:
sns.barplot('BsmtFullBath', 'SalePrice', data=train)


Out[66]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df336d8>

In [67]:
sns.barplot('BsmtHalfBath', 'SalePrice', data=train)


Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x1eb41a90>

In [71]:
sns.barplot('BedroomAbvGr', 'SalePrice', data=train)


Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x1b7f42e8>

In [72]:
sns.barplot('KitchenAbvGr', 'SalePrice', data=train)


Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f447e80>

In [73]:
sns.barplot('KitchenQual', 'SalePrice', data=train)


Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f617390>

In [75]:
sns.barplot('TotRmsAbvGrd', 'SalePrice', data=train)


Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f2a3ac8>

In [76]:
sns.barplot('Functional', 'SalePrice', data=train)


Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fba2c50>

In [77]:
sns.barplot('Fireplaces', 'SalePrice', data=train)


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fde2d68>

In [78]:
sns.barplot('FireplaceQu', 'SalePrice', data=train)


Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fef1470>

In [79]:
sns.barplot('GarageType', 'SalePrice', data=train)


Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x2025e400>

In [81]:
sns.lmplot('GarageYrBlt', 'SalePrice', data=train, fit_reg=True)


Out[81]:
<seaborn.axisgrid.FacetGrid at 0x203864e0>

In [82]:
sns.barplot('GarageFinish', 'SalePrice', data=train)


Out[82]:
<matplotlib.axes._subplots.AxesSubplot at 0x21314d30>

In [83]:
sns.barplot('GarageCars', 'SalePrice', data=train)


Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x212ffef0>

In [84]:
sns.lmplot('GarageArea', 'SalePrice', data=train, fit_reg=True)


Out[84]:
<seaborn.axisgrid.FacetGrid at 0x2174cac8>

In [86]:
sns.barplot('GarageQual', 'SalePrice', data=train)


Out[86]:
<matplotlib.axes._subplots.AxesSubplot at 0x21918940>

In [85]:
sns.barplot('GarageCond', 'SalePrice', data=train)


Out[85]:
<matplotlib.axes._subplots.AxesSubplot at 0x21958978>

In [87]:
sns.barplot('PavedDrive', 'SalePrice', data=train)


Out[87]:
<matplotlib.axes._subplots.AxesSubplot at 0x21d6e6a0>

In [88]:
sns.lmplot('WoodDeckSF', 'SalePrice', data=train, fit_reg=True)


Out[88]:
<seaborn.axisgrid.FacetGrid at 0x21ff10f0>

In [89]:
sns.lmplot('OpenPorchSF', 'SalePrice', data=train, fit_reg=True)


Out[89]:
<seaborn.axisgrid.FacetGrid at 0x2236beb8>

In [90]:
sns.lmplot('EnclosedPorch', 'SalePrice', data=train, fit_reg=True)


Out[90]:
<seaborn.axisgrid.FacetGrid at 0x225c80b8>

In [91]:
sns.lmplot('3SsnPorch', 'SalePrice', data=train, fit_reg=True)


Out[91]:
<seaborn.axisgrid.FacetGrid at 0x21fe4550>

In [92]:
sns.lmplot('ScreenPorch', 'SalePrice', data=train, fit_reg=True)


Out[92]:
<seaborn.axisgrid.FacetGrid at 0x22beeb38>

In [93]:
sns.lmplot('PoolArea', 'SalePrice', data=train, fit_reg=True)


Out[93]:
<seaborn.axisgrid.FacetGrid at 0x22e29080>

In [95]:
sns.barplot('PoolQC', 'SalePrice', data=train)


Out[95]:
<matplotlib.axes._subplots.AxesSubplot at 0x22d0ee48>

In [96]:
sns.barplot('Fence', 'SalePrice', data=train)


Out[96]:
<matplotlib.axes._subplots.AxesSubplot at 0x232495c0>

In [97]:
sns.barplot('MiscFeature', 'SalePrice', data=train)


Out[97]:
<matplotlib.axes._subplots.AxesSubplot at 0x236c7780>

In [99]:
sns.lmplot('MiscVal', 'SalePrice', data=train, fit_reg=True)


Out[99]:
<seaborn.axisgrid.FacetGrid at 0x239d6d68>

In [100]:
sns.barplot('MoSold', 'SalePrice', data=train)


Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x23f1b2b0>

In [101]:
sns.barplot('YrSold', 'SalePrice', data=train)


Out[101]:
<matplotlib.axes._subplots.AxesSubplot at 0x24226550>

In [102]:
sns.barplot('SaleType', 'SalePrice', data=train)


Out[102]:
<matplotlib.axes._subplots.AxesSubplot at 0x245412b0>

In [103]:
sns.barplot('SaleCondition', 'SalePrice', data=train)


Out[103]:
<matplotlib.axes._subplots.AxesSubplot at 0x2483ac50>